{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "①词错误，转文本错误          1552\n⑤词正确，无法确定货物为危化品      623\n确认是违禁品               519\n②词正确，用户仅聊到           481\n④方言，听不懂                5\n非危险品货源                 2\n②词正确，用户仅聊到             1\n ⑤词正确，无法确定货物为危化品       1\n ②词正确，用户仅聊到            1\n非危险品货源                 1\nName: reason, dtype: int64"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv(\"人工审核5月30.csv\",sep='\u0001')\n",
    "df\n",
    "def change_contraband_tag(item):\n",
    "    \"\"\"将reason里面确定为违禁品的，改一下备注，改成统一的备注\"\"\"\n",
    "    if \"近期承运违规货源信息\" in item[\"reason\"]:\n",
    "        return \"确认是违禁品\"\n",
    "    else:\n",
    "        return item[\"reason\"]\n",
    "# 删除reason列里面没有备注原因的，那就没审核的\n",
    "df  = df.dropna(axis=0,subset=[\"reason\"])\n",
    "df['reason'] = df.apply(change_contraband_tag,axis=1)\n",
    "df['reason'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "关键词错误,语音转文本出错     887\n确认是违禁品            322\n词正确,无法确定货物为危化品    310\n词正确,用户仅聊到         245\n语音或者文本,听不懂          4\nName: reason, dtype: int64"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 删除reason列里面没有备注原因的，那就没审核的\n",
    "df  = df.dropna(axis=0,subset=[\"reason\"])\n",
    "\n",
    "\n",
    "def change_contraband_tag(item):\n",
    "    \"\"\"将reason里面确定为违禁品的，改一下备注，改成统一的备注\"\"\"\n",
    "    if \"近期承运违规货源信息\" in item[\"reason\"]:\n",
    "        return \"确认是违禁品\"\n",
    "    elif \"近期发布违规货源信息\" in item[\"reason\"]:\n",
    "        return \"确认是违禁品\"\n",
    "    elif '词错误，转文本' in item['reason']:\n",
    "        return \"关键词错误,语音转文本出错\"\n",
    "    elif '词错误，是转文本' in item['reason']:\n",
    "        return \"关键词错误,语音转文本出错\"\n",
    "    elif \"无法确定货物为危化品\" in item[\"reason\"]:\n",
    "        return \"词正确,无法确定货物为危化品\"\n",
    "    elif \"无法确定是否为危化品\" in item[\"reason\"]:\n",
    "        return \"词正确,无法确定货物为危化品\"\n",
    "    elif \"词正确，用户仅聊到\" in item[\"reason\"]:\n",
    "        return \"词正确,用户仅聊到\"\n",
    "    elif \"听不懂\" in item[\"reason\"]:\n",
    "        return \"语音或者文本,听不懂\"\n",
    "    elif \"未找到违禁词\" in item[\"reason\"]:\n",
    "        return \"未找到违禁词\"\n",
    "    elif \"词正确，用户仅仅聊到\" in item[\"reason\"]:\n",
    "        return \"词正确,用户仅聊到\"\n",
    "    elif \"水性的\" in item[\"reason\"]:\n",
    "        return \"词正确,无法确定货物为危化品\"\n",
    "    elif \"货主打错字，实际是化肥\" in item[\"reason\"]:\n",
    "        return \"关键词错误,语音转文本出错\"\n",
    "    elif \"非危险品货源\" in item[\"reason\"]:\n",
    "        return \"词正确,无法确定货物为危化品\"\n",
    "    elif \"非违禁品货源\" in item[\"reason\"]:\n",
    "        return \"词正确,无法确定货物为危化品\"\n",
    "    else:\n",
    "        return item[\"reason\"]\n",
    "df['reason'] = df.apply(change_contraband_tag,axis=1)\n",
    "df.dropna(subset=[\"calltext\"],inplace=True)\n",
    "df['reason'].value_counts()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [],
   "source": [
    "df_filter = df[(df[\"reason\"] ==\"确认是违禁品\") | (df[\"reason\"] ==\"词正确,无法确定货物为危化品\") ]\n",
    "df_filter.to_csv(\"new_data.csv\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "data": {
      "text/plain": "                                  call_id  cargo_tag  reason  \\\n4760  1201_9715_4294967295_20220411055430        1.0  确认是违禁品   \n5367          202204051957381807040422195        1.0  确认是违禁品   \n1953   C20220411165841AC10210529579346777        1.0  确认是违禁品   \n7676                   563334022344810496        1.0  确认是违禁品   \n3642   C20220412144732AC10213921271988858        1.0  确认是违禁品   \n...                                   ...        ...     ...   \n6954   C20220408103208AC10211907961862223        NaN  未找到违禁词   \n6956   C20220408123048AC10210586403137838        NaN  未找到违禁词   \n7725   1201_535_4294967295_20220411050335        NaN  未找到违禁词   \n7747          202204040803341882660722225        NaN  未找到违禁词   \n7948   C20220411132643AC10212026128536391        NaN  未找到违禁词   \n\n                                             asr_detail  sure_contraband_prob  \\\n4760  [{\"start_time\": 240, \"end_time\": 920, \"text\": ...               0.98065   \n5367  [{\"start_time\": 620, \"end_time\": 1780, \"text\":...               0.98756   \n1953  [{\"start_time\": 2460, \"end_time\": 3080, \"text\"...               0.98777   \n7676  [{\"start_time\": 1340, \"end_time\": 2840, \"text\"...               0.98685   \n3642  [{\"start_time\": 100, \"end_time\": 1240, \"text\":...               0.99149   \n...                                                 ...                   ...   \n6954  [{\"start_time\": 720, \"end_time\": 1160, \"text\":...               0.98024   \n6956  [{\"start_time\": 0, \"end_time\": 300, \"text\": \"喂...               0.98540   \n7725  [{\"start_time\": 60, \"end_time\": 840, \"text\": \"...               0.98580   \n7747  [{\"start_time\": 480, \"end_time\": 1180, \"text\":...               0.98172   \n7948  [{\"start_time\": 0, \"end_time\": 2380, \"text\": \"...               0.99193   \n\n                                             airesponse contraband_word  \n4760  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [锯末]  \n5367  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [下脚料]  \n1953  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [废钢]  \n7676  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [烟叶]  \n3642  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [废塑料]  \n...                                                 ...             ...  \n6954  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [油漆]  \n6956  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [镁粉]  \n7725  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [棉花]  \n7747  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [吗啡]  \n7948  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [下脚料]  \n\n[281 rows x 7 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>call_id</th>\n      <th>cargo_tag</th>\n      <th>reason</th>\n      <th>asr_detail</th>\n      <th>sure_contraband_prob</th>\n      <th>airesponse</th>\n      <th>contraband_word</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>4760</th>\n      <td>1201_9715_4294967295_20220411055430</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 240, \"end_time\": 920, \"text\": ...</td>\n      <td>0.98065</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[锯末]</td>\n    </tr>\n    <tr>\n      <th>5367</th>\n      <td>202204051957381807040422195</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 620, \"end_time\": 1780, \"text\":...</td>\n      <td>0.98756</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[下脚料]</td>\n    </tr>\n    <tr>\n      <th>1953</th>\n      <td>C20220411165841AC10210529579346777</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 2460, \"end_time\": 3080, \"text\"...</td>\n      <td>0.98777</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[废钢]</td>\n    </tr>\n    <tr>\n      <th>7676</th>\n      <td>563334022344810496</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 1340, \"end_time\": 2840, \"text\"...</td>\n      <td>0.98685</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[烟叶]</td>\n    </tr>\n    <tr>\n      <th>3642</th>\n      <td>C20220412144732AC10213921271988858</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 100, \"end_time\": 1240, \"text\":...</td>\n      <td>0.99149</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[废塑料]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>6954</th>\n      <td>C20220408103208AC10211907961862223</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 720, \"end_time\": 1160, \"text\":...</td>\n      <td>0.98024</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[油漆]</td>\n    </tr>\n    <tr>\n      <th>6956</th>\n      <td>C20220408123048AC10210586403137838</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 0, \"end_time\": 300, \"text\": \"喂...</td>\n      <td>0.98540</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[镁粉]</td>\n    </tr>\n    <tr>\n      <th>7725</th>\n      <td>1201_535_4294967295_20220411050335</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 60, \"end_time\": 840, \"text\": \"...</td>\n      <td>0.98580</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[棉花]</td>\n    </tr>\n    <tr>\n      <th>7747</th>\n      <td>202204040803341882660722225</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 480, \"end_time\": 1180, \"text\":...</td>\n      <td>0.98172</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[吗啡]</td>\n    </tr>\n    <tr>\n      <th>7948</th>\n      <td>C20220411132643AC10212026128536391</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 0, \"end_time\": 2380, \"text\": \"...</td>\n      <td>0.99193</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[下脚料]</td>\n    </tr>\n  </tbody>\n</table>\n<p>281 rows × 7 columns</p>\n</div>"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "outputs": [
    {
     "data": {
      "text/plain": "                                  call_id  cargo_tag  reason  \\\n4760  1201_9715_4294967295_20220411055430        1.0  确认是违禁品   \n5367          202204051957381807040422195        1.0  确认是违禁品   \n1953   C20220411165841AC10210529579346777        1.0  确认是违禁品   \n7676                   563334022344810496        1.0  确认是违禁品   \n3642   C20220412144732AC10213921271988858        1.0  确认是违禁品   \n...                                   ...        ...     ...   \n6954   C20220408103208AC10211907961862223        NaN  未找到违禁词   \n6956   C20220408123048AC10210586403137838        NaN  未找到违禁词   \n7725   1201_535_4294967295_20220411050335        NaN  未找到违禁词   \n7747          202204040803341882660722225        NaN  未找到违禁词   \n7948   C20220411132643AC10212026128536391        NaN  未找到违禁词   \n\n                                             asr_detail  sure_contraband_prob  \\\n4760  [{\"start_time\": 240, \"end_time\": 920, \"text\": ...               0.98065   \n5367  [{\"start_time\": 620, \"end_time\": 1780, \"text\":...               0.98756   \n1953  [{\"start_time\": 2460, \"end_time\": 3080, \"text\"...               0.98777   \n7676  [{\"start_time\": 1340, \"end_time\": 2840, \"text\"...               0.98685   \n3642  [{\"start_time\": 100, \"end_time\": 1240, \"text\":...               0.99149   \n...                                                 ...                   ...   \n6954  [{\"start_time\": 720, \"end_time\": 1160, \"text\":...               0.98024   \n6956  [{\"start_time\": 0, \"end_time\": 300, \"text\": \"喂...               0.98540   \n7725  [{\"start_time\": 60, \"end_time\": 840, \"text\": \"...               0.98580   \n7747  [{\"start_time\": 480, \"end_time\": 1180, \"text\":...               0.98172   \n7948  [{\"start_time\": 0, \"end_time\": 2380, \"text\": \"...               0.99193   \n\n                                             airesponse contraband_word  \\\n4760  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [锯末]   \n5367  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [下脚料]   \n1953  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [废钢]   \n7676  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [烟叶]   \n3642  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [废塑料]   \n...                                                 ...             ...   \n6954  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [油漆]   \n6956  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [镁粉]   \n7725  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [棉花]   \n7747  {\"contraband_infos\":[{\"not_sure_contraband\":[]...            [吗啡]   \n7948  {\"contraband_infos\":[{\"not_sure_contraband\":[]...           [下脚料]   \n\n                                               dialogue  \n4760  S:你好。D:喂，丹东到鸡西的什么货啊？美女？S:境外的。D:$$锯末$$呗啊。S:嗯。D:...  \n5367  S:喂。D:哎，你好。S:喂，您好，您说。D:哎，我问一下咱们这个有个得这个天津津南小站镇到...  \n1953  D:喂。D:喂。S:81。D:喂喂，能听到吗？S:喂。D:啊，老板同学车到镇上那个废铁$$废...  \n7676  S:喂，你好。D:喂，你好，问一下那个1-2吨服装视频给多少钱？S:啊，那个拉的时间也给1万...  \n3642  D:喂，你好。S:喂，兄弟。S:那没错，安溪到到平阳这边拉$$废塑料$$，长斑的多少钱呢？D...  \n...                                                 ...  \n6954  S:喂喂。D:哎，你好。S:啊，你好。D:你那会穿的三次吨什么样的七匹狼？S:他就是那些汽车...  \n6956  S:喂。D:喂喂，老板你这个货能订不能？S:哪个货啊？D:结果那个红灯那个货吗？S:啊，我还...  \n7725  S:喂，你好。D:你那个。D:从郑州里面唐古拉$$棉花$$，拉到换岗，那给多少钱呢？D:6米...  \n7747  S:哎，你好。D:喂，你好，解释的光棍那个，呃，要出多少钱？一吨？S:750明天装。D:明天...  \n7948  S:啊。D:哎，喂，你好。S:哎。D:啊，没装，他们大邱庄到徐水装那个管。S:激活。D:多少...  \n\n[281 rows x 8 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>call_id</th>\n      <th>cargo_tag</th>\n      <th>reason</th>\n      <th>asr_detail</th>\n      <th>sure_contraband_prob</th>\n      <th>airesponse</th>\n      <th>contraband_word</th>\n      <th>dialogue</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>4760</th>\n      <td>1201_9715_4294967295_20220411055430</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 240, \"end_time\": 920, \"text\": ...</td>\n      <td>0.98065</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[锯末]</td>\n      <td>S:你好。D:喂，丹东到鸡西的什么货啊？美女？S:境外的。D:$$锯末$$呗啊。S:嗯。D:...</td>\n    </tr>\n    <tr>\n      <th>5367</th>\n      <td>202204051957381807040422195</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 620, \"end_time\": 1780, \"text\":...</td>\n      <td>0.98756</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[下脚料]</td>\n      <td>S:喂。D:哎，你好。S:喂，您好，您说。D:哎，我问一下咱们这个有个得这个天津津南小站镇到...</td>\n    </tr>\n    <tr>\n      <th>1953</th>\n      <td>C20220411165841AC10210529579346777</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 2460, \"end_time\": 3080, \"text\"...</td>\n      <td>0.98777</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[废钢]</td>\n      <td>D:喂。D:喂。S:81。D:喂喂，能听到吗？S:喂。D:啊，老板同学车到镇上那个废铁$$废...</td>\n    </tr>\n    <tr>\n      <th>7676</th>\n      <td>563334022344810496</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 1340, \"end_time\": 2840, \"text\"...</td>\n      <td>0.98685</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[烟叶]</td>\n      <td>S:喂，你好。D:喂，你好，问一下那个1-2吨服装视频给多少钱？S:啊，那个拉的时间也给1万...</td>\n    </tr>\n    <tr>\n      <th>3642</th>\n      <td>C20220412144732AC10213921271988858</td>\n      <td>1.0</td>\n      <td>确认是违禁品</td>\n      <td>[{\"start_time\": 100, \"end_time\": 1240, \"text\":...</td>\n      <td>0.99149</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[废塑料]</td>\n      <td>D:喂，你好。S:喂，兄弟。S:那没错，安溪到到平阳这边拉$$废塑料$$，长斑的多少钱呢？D...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>6954</th>\n      <td>C20220408103208AC10211907961862223</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 720, \"end_time\": 1160, \"text\":...</td>\n      <td>0.98024</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[油漆]</td>\n      <td>S:喂喂。D:哎，你好。S:啊，你好。D:你那会穿的三次吨什么样的七匹狼？S:他就是那些汽车...</td>\n    </tr>\n    <tr>\n      <th>6956</th>\n      <td>C20220408123048AC10210586403137838</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 0, \"end_time\": 300, \"text\": \"喂...</td>\n      <td>0.98540</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[镁粉]</td>\n      <td>S:喂。D:喂喂，老板你这个货能订不能？S:哪个货啊？D:结果那个红灯那个货吗？S:啊，我还...</td>\n    </tr>\n    <tr>\n      <th>7725</th>\n      <td>1201_535_4294967295_20220411050335</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 60, \"end_time\": 840, \"text\": \"...</td>\n      <td>0.98580</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[棉花]</td>\n      <td>S:喂，你好。D:你那个。D:从郑州里面唐古拉$$棉花$$，拉到换岗，那给多少钱呢？D:6米...</td>\n    </tr>\n    <tr>\n      <th>7747</th>\n      <td>202204040803341882660722225</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 480, \"end_time\": 1180, \"text\":...</td>\n      <td>0.98172</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[吗啡]</td>\n      <td>S:哎，你好。D:喂，你好，解释的光棍那个，呃，要出多少钱？一吨？S:750明天装。D:明天...</td>\n    </tr>\n    <tr>\n      <th>7948</th>\n      <td>C20220411132643AC10212026128536391</td>\n      <td>NaN</td>\n      <td>未找到违禁词</td>\n      <td>[{\"start_time\": 0, \"end_time\": 2380, \"text\": \"...</td>\n      <td>0.99193</td>\n      <td>{\"contraband_infos\":[{\"not_sure_contraband\":[]...</td>\n      <td>[下脚料]</td>\n      <td>S:啊。D:哎，喂，你好。S:哎。D:啊，没装，他们大邱庄到徐水装那个管。S:激活。D:多少...</td>\n    </tr>\n  </tbody>\n</table>\n<p>281 rows × 8 columns</p>\n</div>"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [
    {
     "data": {
      "text/plain": "                                  call_id  reason contraband_word  \\\n4760  1201_9715_4294967295_20220411055430  确认是违禁品            [锯末]   \n5367          202204051957381807040422195  确认是违禁品           [下脚料]   \n1953   C20220411165841AC10210529579346777  确认是违禁品            [废钢]   \n7676                   563334022344810496  确认是违禁品            [烟叶]   \n3642   C20220412144732AC10213921271988858  确认是违禁品           [废塑料]   \n...                                   ...     ...             ...   \n6954   C20220408103208AC10211907961862223  未找到违禁词            [油漆]   \n6956   C20220408123048AC10210586403137838  未找到违禁词            [镁粉]   \n7725   1201_535_4294967295_20220411050335  未找到违禁词            [棉花]   \n7747          202204040803341882660722225  未找到违禁词            [吗啡]   \n7948   C20220411132643AC10212026128536391  未找到违禁词           [下脚料]   \n\n                                               dialogue  \n4760  S:你好。D:喂，丹东到鸡西的什么货啊？美女？S:境外的。D:$$锯末$$呗啊。S:嗯。D:...  \n5367  S:喂。D:哎，你好。S:喂，您好，您说。D:哎，我问一下咱们这个有个得这个天津津南小站镇到...  \n1953  D:喂。D:喂。S:81。D:喂喂，能听到吗？S:喂。D:啊，老板同学车到镇上那个废铁$$废...  \n7676  S:喂，你好。D:喂，你好，问一下那个1-2吨服装视频给多少钱？S:啊，那个拉的时间也给1万...  \n3642  D:喂，你好。S:喂，兄弟。S:那没错，安溪到到平阳这边拉$$废塑料$$，长斑的多少钱呢？D...  \n...                                                 ...  \n6954  S:喂喂。D:哎，你好。S:啊，你好。D:你那会穿的三次吨什么样的七匹狼？S:他就是那些汽车...  \n6956  S:喂。D:喂喂，老板你这个货能订不能？S:哪个货啊？D:结果那个红灯那个货吗？S:啊，我还...  \n7725  S:喂，你好。D:你那个。D:从郑州里面唐古拉$$棉花$$，拉到换岗，那给多少钱呢？D:6米...  \n7747  S:哎，你好。D:喂，你好，解释的光棍那个，呃，要出多少钱？一吨？S:750明天装。D:明天...  \n7948  S:啊。D:哎，喂，你好。S:哎。D:啊，没装，他们大邱庄到徐水装那个管。S:激活。D:多少...  \n\n[281 rows x 4 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>call_id</th>\n      <th>reason</th>\n      <th>contraband_word</th>\n      <th>dialogue</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>4760</th>\n      <td>1201_9715_4294967295_20220411055430</td>\n      <td>确认是违禁品</td>\n      <td>[锯末]</td>\n      <td>S:你好。D:喂，丹东到鸡西的什么货啊？美女？S:境外的。D:$$锯末$$呗啊。S:嗯。D:...</td>\n    </tr>\n    <tr>\n      <th>5367</th>\n      <td>202204051957381807040422195</td>\n      <td>确认是违禁品</td>\n      <td>[下脚料]</td>\n      <td>S:喂。D:哎，你好。S:喂，您好，您说。D:哎，我问一下咱们这个有个得这个天津津南小站镇到...</td>\n    </tr>\n    <tr>\n      <th>1953</th>\n      <td>C20220411165841AC10210529579346777</td>\n      <td>确认是违禁品</td>\n      <td>[废钢]</td>\n      <td>D:喂。D:喂。S:81。D:喂喂，能听到吗？S:喂。D:啊，老板同学车到镇上那个废铁$$废...</td>\n    </tr>\n    <tr>\n      <th>7676</th>\n      <td>563334022344810496</td>\n      <td>确认是违禁品</td>\n      <td>[烟叶]</td>\n      <td>S:喂，你好。D:喂，你好，问一下那个1-2吨服装视频给多少钱？S:啊，那个拉的时间也给1万...</td>\n    </tr>\n    <tr>\n      <th>3642</th>\n      <td>C20220412144732AC10213921271988858</td>\n      <td>确认是违禁品</td>\n      <td>[废塑料]</td>\n      <td>D:喂，你好。S:喂，兄弟。S:那没错，安溪到到平阳这边拉$$废塑料$$，长斑的多少钱呢？D...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>6954</th>\n      <td>C20220408103208AC10211907961862223</td>\n      <td>未找到违禁词</td>\n      <td>[油漆]</td>\n      <td>S:喂喂。D:哎，你好。S:啊，你好。D:你那会穿的三次吨什么样的七匹狼？S:他就是那些汽车...</td>\n    </tr>\n    <tr>\n      <th>6956</th>\n      <td>C20220408123048AC10210586403137838</td>\n      <td>未找到违禁词</td>\n      <td>[镁粉]</td>\n      <td>S:喂。D:喂喂，老板你这个货能订不能？S:哪个货啊？D:结果那个红灯那个货吗？S:啊，我还...</td>\n    </tr>\n    <tr>\n      <th>7725</th>\n      <td>1201_535_4294967295_20220411050335</td>\n      <td>未找到违禁词</td>\n      <td>[棉花]</td>\n      <td>S:喂，你好。D:你那个。D:从郑州里面唐古拉$$棉花$$，拉到换岗，那给多少钱呢？D:6米...</td>\n    </tr>\n    <tr>\n      <th>7747</th>\n      <td>202204040803341882660722225</td>\n      <td>未找到违禁词</td>\n      <td>[吗啡]</td>\n      <td>S:哎，你好。D:喂，你好，解释的光棍那个，呃，要出多少钱？一吨？S:750明天装。D:明天...</td>\n    </tr>\n    <tr>\n      <th>7948</th>\n      <td>C20220411132643AC10212026128536391</td>\n      <td>未找到违禁词</td>\n      <td>[下脚料]</td>\n      <td>S:啊。D:哎，喂，你好。S:哎。D:啊，没装，他们大邱庄到徐水装那个管。S:激活。D:多少...</td>\n    </tr>\n  </tbody>\n</table>\n<p>281 rows × 4 columns</p>\n</div>"
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}